{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Setup OpenEBS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "cluster-cpu-nodes-Node\ti-0bf31bc06d431a3b4\tus-west-2a\n",
      "cluster-cpu-nodes-Node\ti-0e1e3cc267f54e715\tus-west-2c\n",
      "cluster-cpu-nodes-Node\ti-0817a7d327859fae3\tus-west-2c\n",
      "cluster-cpu-nodes-Node\ti-08612759f8e75d6e4\tus-west-2d\n"
     ]
    }
   ],
   "source": [
    "!aws ec2 describe-instances \\\n",
    "        --query 'Reservations[].Instances[].[Tags[?Key==`Name`].Value[] | [0], InstanceId, Placement.AvailabilityZone]' \\\n",
    "        --output text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "i-0bf31bc06d431a3b4\n",
      "cluster-cpu-nodes-Node\n",
      "vol-09722e1b454447a05\n",
      "i-0e1e3cc267f54e715\n",
      "cluster-cpu-nodes-Node\n",
      "vol-007750725871cd26e\tvol-04a1b2bd8346671c1\tvol-0f76f1aee5701e5a6\n",
      "i-0817a7d327859fae3\n",
      "cluster-cpu-nodes-Node\n",
      "vol-09c3a0c18bae7b896\tvol-06ee4851914ec0a6f\n",
      "i-08612759f8e75d6e4\n",
      "cluster-cpu-nodes-Node\n",
      "vol-06496a8c33dbdd9a8\tvol-0aa9c83fe4f5b4c8e\tvol-0fe83c47252fb76b3\n"
     ]
    }
   ],
   "source": [
    "!aws ec2 describe-instances \\\n",
    "    --query 'Reservations[*].Instances[*].[Tags[?Key==`Name`].Value,InstanceId,BlockDeviceMappings[*].Ebs.VolumeId]' \\\n",
    "    --output text"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Install OpenEBS\n",
    "https://docs.openebs.io/docs/next/uglocalpv-hostpath.html#create-storageclass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# DEFAULT: The absolute path on the node where the Hostpath directory of a Local PV Volume will be created.\n",
    "OPENEBS_IO_LOCALPV_HOSTPATH_DIR='/var/openebs/local'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "--2020-10-29 10:10:17--  https://openebs.github.io/charts/openebs-operator.yaml\n",
      "Resolving openebs.github.io (openebs.github.io)... 185.199.110.153, 185.199.108.153, 185.199.109.153, ...\n",
      "Connecting to openebs.github.io (openebs.github.io)|185.199.110.153|:443... connected.\n",
      "HTTP request sent, awaiting response... 200 OK\n",
      "Length: 28070 (27K) [text/yaml]\n",
      "Saving to: ‘openebs-operator.yaml.1’\n",
      "\n",
      "openebs-operator.ya 100%[===================>]  27.41K  --.-KB/s    in 0.009s  \n",
      "\n",
      "2020-10-29 10:10:17 (3.14 MB/s) - ‘openebs-operator.yaml.1’ saved [28070/28070]\n",
      "\n"
     ]
    }
   ],
   "source": [
    "!wget https://openebs.github.io/charts/openebs-operator.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!pygmentize openebs-operator.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "namespace/openebs created\n",
      "serviceaccount/openebs-maya-operator created\n",
      "clusterrole.rbac.authorization.k8s.io/openebs-maya-operator created\n",
      "clusterrolebinding.rbac.authorization.k8s.io/openebs-maya-operator created\n",
      "deployment.apps/maya-apiserver created\n",
      "service/maya-apiserver-service created\n",
      "deployment.apps/openebs-provisioner created\n",
      "deployment.apps/openebs-snapshot-operator created\n",
      "configmap/openebs-ndm-config created\n",
      "daemonset.apps/openebs-ndm created\n",
      "deployment.apps/openebs-ndm-operator created\n",
      "deployment.apps/openebs-admission-server created\n",
      "deployment.apps/openebs-localpv-provisioner created\n"
     ]
    }
   ],
   "source": [
    "!kubectl apply -f https://openebs.github.io/charts/openebs-operator.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME                                           READY   STATUS    RESTARTS   AGE\n",
      "maya-apiserver-649b4db7d4-xfztk                0/1     Running   0          8s\n",
      "openebs-admission-server-5cff586c89-254c2      1/1     Running   0          8s\n",
      "openebs-localpv-provisioner-6498bddcff-bjdlm   1/1     Running   0          8s\n",
      "openebs-ndm-chn4f                              1/1     Running   0          8s\n",
      "openebs-ndm-kfv2h                              1/1     Running   0          7s\n",
      "openebs-ndm-operator-68bc9775cb-r2gf7          0/1     Running   0          8s\n",
      "openebs-ndm-rbfqd                              1/1     Running   0          8s\n",
      "openebs-ndm-sxqfk                              1/1     Running   0          8s\n",
      "openebs-provisioner-69489fb565-v6fkz           1/1     Running   0          8s\n",
      "openebs-snapshot-operator-5c758ff48f-cj859     2/2     Running   0          8s\n"
     ]
    }
   ],
   "source": [
    "!kubectl get pods -n openebs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create StorageClass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[94mapiVersion\u001b[39;49;00m: storage.k8s.io/v1\n",
      "\u001b[94mkind\u001b[39;49;00m: StorageClass\n",
      "\u001b[94mmetadata\u001b[39;49;00m:\n",
      "  \u001b[94mname\u001b[39;49;00m: local-hostpath\n",
      "  \u001b[94mannotations\u001b[39;49;00m:\n",
      "    \u001b[94mopenebs.io/cas-type\u001b[39;49;00m: local\n",
      "    \u001b[94mcas.openebs.io/config\u001b[39;49;00m: |\n",
      "      \u001b[31m- name: StorageType\u001b[39;49;00m\n",
      "        \u001b[31mvalue: hostpath\u001b[39;49;00m\n",
      "      \u001b[31m- name: BasePath\u001b[39;49;00m\n",
      "        \u001b[31mvalue: /opt/ml\u001b[39;49;00m\n",
      "\u001b[94mprovisioner\u001b[39;49;00m: openebs.io/local\n",
      "\u001b[94mreclaimPolicy\u001b[39;49;00m: Delete\n",
      "\u001b[94mvolumeBindingMode\u001b[39;49;00m: WaitForFirstConsumer\n"
     ]
    }
   ],
   "source": [
    "!pygmentize eks-openebs/local-hostpath-sc.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!kubectl delete sc local-hostpath"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "storageclass.storage.k8s.io/local-hostpath created\n"
     ]
    }
   ],
   "source": [
    "!kubectl create -f local-hostpath-sc.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "apiVersion: storage.k8s.io/v1\n",
      "kind: StorageClass\n",
      "metadata:\n",
      "  annotations:\n",
      "    cas.openebs.io/config: |\n",
      "      - name: StorageType\n",
      "        value: hostpath\n",
      "      - name: BasePath\n",
      "        value: /opt/ml\n",
      "    openebs.io/cas-type: local\n",
      "  creationTimestamp: \"2020-10-29T10:28:34Z\"\n",
      "  name: local-hostpath\n",
      "  resourceVersion: \"28589143\"\n",
      "  selfLink: /apis/storage.k8s.io/v1/storageclasses/local-hostpath\n",
      "  uid: 199b866f-a1cd-4d7d-9c70-bb79e7eb907d\n",
      "provisioner: openebs.io/local\n",
      "reclaimPolicy: Delete\n",
      "volumeBindingMode: WaitForFirstConsumer\n"
     ]
    }
   ],
   "source": [
    "!kubectl get sc local-hostpath -o yaml"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Verify "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME                                           READY   STATUS    RESTARTS   AGE\n",
      "openebs-localpv-provisioner-6498bddcff-bjdlm   1/1     Running   0          18m\n"
     ]
    }
   ],
   "source": [
    "!kubectl get pods -n openebs -l openebs.io/component-name=openebs-localpv-provisioner"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME                        PROVISIONER                                                RECLAIMPOLICY   VOLUMEBINDINGMODE      ALLOWVOLUMEEXPANSION   AGE\n",
      "gp2 (default)               kubernetes.io/aws-ebs                                      Delete          WaitForFirstConsumer   false                  53d\n",
      "local-hostpath              openebs.io/local                                           Delete          WaitForFirstConsumer   false                  43s\n",
      "openebs-device              openebs.io/local                                           Delete          WaitForFirstConsumer   false                  30m\n",
      "openebs-hostpath            openebs.io/local                                           Delete          WaitForFirstConsumer   false                  18m\n",
      "openebs-jiva-default        openebs.io/provisioner-iscsi                               Delete          Immediate              false                  30m\n",
      "openebs-snapshot-promoter   volumesnapshot.external-storage.k8s.io/snapshot-promoter   Delete          Immediate              false                  30m\n"
     ]
    }
   ],
   "source": [
    "!kubectl get sc"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create a PersistentVolumeClaim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[94mkind\u001b[39;49;00m: PersistentVolumeClaim\n",
      "\u001b[94mapiVersion\u001b[39;49;00m: v1\n",
      "\u001b[94mmetadata\u001b[39;49;00m:\n",
      "  \u001b[94mname\u001b[39;49;00m: local-hostpath-pvc\n",
      "\u001b[94mspec\u001b[39;49;00m:\n",
      "  \u001b[94mstorageClassName\u001b[39;49;00m: local-hostpath\n",
      "  \u001b[94maccessModes\u001b[39;49;00m:\n",
      "    - ReadWriteOnce\n",
      "  \u001b[94mresources\u001b[39;49;00m:\n",
      "    \u001b[94mrequests\u001b[39;49;00m:\n",
      "      \u001b[94mstorage\u001b[39;49;00m: 5G\n"
     ]
    }
   ],
   "source": [
    "!pygmentize eks-openebs/local-hostpath-pvc.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "persistentvolumeclaim/local-hostpath-pvc created\n"
     ]
    }
   ],
   "source": [
    "!kubectl apply -f eks-openebs/local-hostpath-pvc.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME                 STATUS    VOLUME   CAPACITY   ACCESS MODES   STORAGECLASS     AGE\n",
      "local-hostpath-pvc   Pending                                      local-hostpath   4s\n"
     ]
    }
   ],
   "source": [
    "!kubectl get pvc local-hostpath-pvc"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Create Pod to consume OpenEBS Local PV Hospath Storage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 112,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME                                                           READY   STATUS      RESTARTS   AGE\n",
      "admission-webhook-bootstrap-stateful-set-0                     1/1     Running     6          53d\n",
      "admission-webhook-deployment-59bc556b94-x544t                  1/1     Running     0          13h\n",
      "alb-ingress-controller-798fbd8c96-b6bh4                        1/1     Running     2          53d\n",
      "application-controller-stateful-set-0                          1/1     Running     0          53d\n",
      "argo-ui-5f845464d7-k28qc                                       1/1     Running     0          53d\n",
      "bert-ml-pod                                                    0/1     Error       0          175m\n",
      "bert-ml-pod-eks                                                0/1     Completed   0          98m\n",
      "centraldashboard-d5c6d6bf-5scrq                                1/1     Running     0          53d\n",
      "jupyter-web-app-deployment-544b7d5684-6r7g4                    1/1     Running     0          53d\n",
      "katib-controller-6b87947df8-hpnsk                              1/1     Running     1          53d\n",
      "katib-db-manager-54b64f99b-n49mb                               1/1     Running     0          53d\n",
      "katib-mysql-74747879d7-mw9rt                                   1/1     Running     0          53d\n",
      "katib-ui-76f84754b6-vtcqd                                      1/1     Running     0          53d\n",
      "kfserving-controller-manager-0                                 2/2     Running     1          53d\n",
      "metacontroller-0                                               1/1     Running     0          53d\n",
      "metadata-db-79d6cf9d94-fjtlw                                   1/1     Running     0          53d\n",
      "metadata-deployment-5dd4c9d4cf-sdqfx                           1/1     Running     0          53d\n",
      "metadata-envoy-deployment-5b9f9466d9-pq4sq                     1/1     Running     0          53d\n",
      "metadata-grpc-deployment-74f69954dc-dwng6                      1/1     Running     3          53d\n",
      "metadata-ui-8968fc7d9-2sftt                                    1/1     Running     0          53d\n",
      "minio-6b88d6499f-s82mk                                         1/1     Running     0          53d\n",
      "ml-pipeline-698bcdd747-2wrfn                                   1/1     Running     0          53d\n",
      "ml-pipeline-ml-pipeline-visualizationserver-675656df79-wpqjv   1/1     Running     0          53d\n",
      "ml-pipeline-persistenceagent-7785884886-2g4sh                  1/1     Running     0          53d\n",
      "ml-pipeline-scheduledworkflow-7b4cb5d959-dfptl                 1/1     Running     0          53d\n",
      "ml-pipeline-ui-5fbd94b9fb-rbkt6                                1/1     Running     0          53d\n",
      "ml-pipeline-viewer-controller-deployment-69fccfff8c-9hdq6      1/1     Running     0          53d\n",
      "mpi-operator-6494747f88-q5k9t                                  1/1     Running     0          53d\n",
      "mysql-7994454666-8sxsk                                         1/1     Running     0          53d\n",
      "notebook-controller-deployment-576589db9d-mjvxv                1/1     Running     0          53d\n",
      "nvidia-device-plugin-daemonset-2nwdj                           1/1     Running     0          53d\n",
      "nvidia-device-plugin-daemonset-4txwf                           1/1     Running     0          53d\n",
      "nvidia-device-plugin-daemonset-dg6zb                           1/1     Running     0          53d\n",
      "nvidia-device-plugin-daemonset-vx666                           1/1     Running     0          53d\n",
      "profiles-deployment-5f77bb94c6-6h6mp                           2/2     Running     0          53d\n",
      "pytorch-operator-666dd4cd49-8jm46                              1/1     Running     5          53d\n",
      "seldon-controller-manager-5d96986d47-xw85h                     1/1     Running     2          53d\n",
      "spark-operatorcrd-cleanup-hgmnn                                0/2     Completed   0          53d\n",
      "spark-operatorsparkoperator-7c484c6859-zz6cc                   1/1     Running     0          53d\n",
      "tensorboard-6549cd78c9-gqcxt                                   1/1     Running     0          53d\n",
      "tf-job-operator-7574b968b5-p6ntx                               1/1     Running     4          53d\n",
      "workflow-controller-6db95548dd-wjqsm                           1/1     Running     0          53d\n"
     ]
    }
   ],
   "source": [
    "!kubectl get pods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Error from server (NotFound): pods \"bert\" not found\n"
     ]
    }
   ],
   "source": [
    "!kubectl get pods bert-ml-pod-openebs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 89,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pod \"bert\" deleted\n"
     ]
    }
   ],
   "source": [
    "!kubectl delete pod bert-ml-pod-openebs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 92,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pod/bert-ml-pod created\n"
     ]
    }
   ],
   "source": [
    "!kubectl apply -f eks-openebs/bert-ml-pod-openebs.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 87,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Error from server (BadRequest): previous terminated container \"bert\" in pod \"bert\" not found\n"
     ]
    }
   ],
   "source": [
    "!kubectl logs -p bert-ml-pod-openebs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 88,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Name:         bert\n",
      "Namespace:    kubeflow\n",
      "Priority:     0\n",
      "Node:         ip-192-168-67-206.us-west-2.compute.internal/192.168.67.206\n",
      "Start Time:   Thu, 29 Oct 2020 10:40:38 +0000\n",
      "Labels:       <none>\n",
      "Annotations:  kubectl.kubernetes.io/last-applied-configuration:\n",
      "                {\"apiVersion\":\"v1\",\"kind\":\"Pod\",\"metadata\":{\"annotations\":{},\"name\":\"bert\",\"namespace\":\"kubeflow\"},\"spec\":{\"containers\":[{\"command\":[\"pyth...\n",
      "              kubernetes.io/psp: eks.privileged\n",
      "Status:       Failed\n",
      "IP:           192.168.68.249\n",
      "Containers:\n",
      "  bert:\n",
      "    Container ID:  docker://5f3fd1e1ee40b54dd8ded73d9f62fe7cf33c22e2241d2a2291b209526257487e\n",
      "    Image:         231218423789.dkr.ecr.us-west-2.amazonaws.com/dlc-demo:bert\n",
      "    Image ID:      docker-pullable://231218423789.dkr.ecr.us-west-2.amazonaws.com/dlc-demo@sha256:4bf49dbd7bdb5ab8a4c76086622ccfaa893690047c744d68a93d9ee577f1720f\n",
      "    Port:          <none>\n",
      "    Host Port:     <none>\n",
      "    Command:\n",
      "      python\n",
      "      /opt/ml/code/train.py\n",
      "      --train_steps_per_epoch=1\n",
      "      --epochs=1\n",
      "      --learning_rate=0.00001\n",
      "      --epsilon=0.00000001\n",
      "      --train_batch_size=36\n",
      "      --validation_batch_size=18\n",
      "      --test_batch_size=18\n",
      "      --train_steps_per_epoch=1\n",
      "      --validation_steps=1\n",
      "      --test_steps=1\n",
      "      --use_xla=True\n",
      "      --use_amp=False\n",
      "      --max_seq_length=64\n",
      "      --freeze_bert_layer=True\n",
      "      --enable_sagemaker_debugger=False\n",
      "      --enable_checkpointing=False\n",
      "      --enable_tensorboard=False\n",
      "      --run_validation=True\n",
      "      --run_test=False\n",
      "      --run_sample_predictions=True\n",
      "    State:          Terminated\n",
      "      Reason:       Error\n",
      "      Exit Code:    2\n",
      "      Started:      Thu, 29 Oct 2020 10:40:39 +0000\n",
      "      Finished:     Thu, 29 Oct 2020 10:40:39 +0000\n",
      "    Ready:          False\n",
      "    Restart Count:  0\n",
      "    Environment:\n",
      "      SM_TRAINING_ENV:        {\"is_master\":true}\n",
      "      SAGEMAKER_JOB_NAME:     tf-bert-training-eks\n",
      "      SM_CURRENT_HOST:        localhost\n",
      "      SM_NUM_GPUS:            0\n",
      "      SM_HOSTS:               {\"hosts\":\"localhost\"}\n",
      "      SM_MODEL_DIR:           /opt/ml/model/\n",
      "      SM_OUTPUT_DIR:          /opt/ml/output/\n",
      "      SM_OUTPUT_DATA_DIR:     /opt/ml/output/data/\n",
      "      SM_CHANNEL_TRAIN:       /opt/ml/input/data/train\n",
      "      SM_CHANNEL_VALIDATION:  /opt/ml/input/data/validation\n",
      "      SM_CHANNEL_TEST:        /opt/ml/input/data/test\n",
      "    Mounts:\n",
      "      /opt/ml from local-storage (rw)\n",
      "      /var/run/secrets/kubernetes.io/serviceaccount from default-token-nnkjh (ro)\n",
      "Conditions:\n",
      "  Type              Status\n",
      "  Initialized       True \n",
      "  Ready             False \n",
      "  ContainersReady   False \n",
      "  PodScheduled      True \n",
      "Volumes:\n",
      "  local-storage:\n",
      "    Type:       PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)\n",
      "    ClaimName:  local-hostpath-pvc\n",
      "    ReadOnly:   false\n",
      "  default-token-nnkjh:\n",
      "    Type:        Secret (a volume populated by a Secret)\n",
      "    SecretName:  default-token-nnkjh\n",
      "    Optional:    false\n",
      "QoS Class:       BestEffort\n",
      "Node-Selectors:  <none>\n",
      "Tolerations:     node.kubernetes.io/not-ready:NoExecute for 300s\n",
      "                 node.kubernetes.io/unreachable:NoExecute for 300s\n",
      "Events:\n",
      "  Type    Reason     Age    From                                                   Message\n",
      "  ----    ------     ----   ----                                                   -------\n",
      "  Normal  Scheduled  8m32s  default-scheduler                                      Successfully assigned kubeflow/bert to ip-192-168-67-206.us-west-2.compute.internal\n",
      "  Normal  Pulling    8m31s  kubelet, ip-192-168-67-206.us-west-2.compute.internal  Pulling image \"231218423789.dkr.ecr.us-west-2.amazonaws.com/dlc-demo:bert\"\n",
      "  Normal  Pulled     8m31s  kubelet, ip-192-168-67-206.us-west-2.compute.internal  Successfully pulled image \"231218423789.dkr.ecr.us-west-2.amazonaws.com/dlc-demo:bert\"\n",
      "  Normal  Created    8m31s  kubelet, ip-192-168-67-206.us-west-2.compute.internal  Created container bert\n",
      "  Normal  Started    8m31s  kubelet, ip-192-168-67-206.us-west-2.compute.internal  Started container bert\n"
     ]
    }
   ],
   "source": [
    "!kubectl describe pod bert-ml-pod-openebs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!pygmentize eks-openebs/bert-ml-pod-openebs.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 76,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "pod \"bert\" deleted\n"
     ]
    }
   ],
   "source": [
    "!kubectl delete -f eks-openebs/bert-ml-pod-openebs.yaml"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME          READY   STATUS    RESTARTS   AGE\n",
      "bert-ml-pod   1/1     Running   0          9s\n",
      "Error from server (NotFound): pods \"created\" not found\n"
     ]
    }
   ],
   "source": [
    "!kubectl get pod bert-ml-pod-openebs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 74,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Name:         bert\n",
      "Namespace:    kubeflow\n",
      "Priority:     0\n",
      "Node:         ip-192-168-67-206.us-west-2.compute.internal/192.168.67.206\n",
      "Start Time:   Thu, 29 Oct 2020 10:36:59 +0000\n",
      "Labels:       <none>\n",
      "Annotations:  kubectl.kubernetes.io/last-applied-configuration:\n",
      "                {\"apiVersion\":\"v1\",\"kind\":\"Pod\",\"metadata\":{\"annotations\":{},\"name\":\"bert\",\"namespace\":\"kubeflow\"},\"spec\":{\"containers\":[{\"command\":[\"pyth...\n",
      "              kubernetes.io/psp: eks.privileged\n",
      "Status:       Failed\n",
      "IP:           192.168.68.249\n",
      "Containers:\n",
      "  bert:\n",
      "    Container ID:  docker://1fdf43c6b3513c8ab6fa3b3b784611808c687eb0b4356af9661f8ac05c5d807f\n",
      "    Image:         231218423789.dkr.ecr.us-west-2.amazonaws.com/dlc-demo:bert\n",
      "    Image ID:      docker-pullable://231218423789.dkr.ecr.us-west-2.amazonaws.com/dlc-demo@sha256:4bf49dbd7bdb5ab8a4c76086622ccfaa893690047c744d68a93d9ee577f1720f\n",
      "    Port:          <none>\n",
      "    Host Port:     <none>\n",
      "    Command:\n",
      "      python\n",
      "      /opt/ml/code/train.py\n",
      "      --train_steps_per_epoch=1\n",
      "      --epochs=1\n",
      "      --learning_rate=0.00001\n",
      "      --epsilon=0.00000001\n",
      "      --train_batch_size=36\n",
      "      --validation_batch_size=18\n",
      "      --test_batch_size=18\n",
      "      --train_steps_per_epoch=1\n",
      "      --validation_steps=1\n",
      "      --test_steps=1\n",
      "      --use_xla=True\n",
      "      --use_amp=False\n",
      "      --max_seq_length=64\n",
      "      --freeze_bert_layer=True\n",
      "      --enable_sagemaker_debugger=False\n",
      "      --enable_checkpointing=False\n",
      "      --enable_tensorboard=False\n",
      "      --run_validation=True\n",
      "      --run_test=False\n",
      "      --run_sample_predictions=True\n",
      "    State:          Terminated\n",
      "      Reason:       Error\n",
      "      Exit Code:    2\n",
      "      Started:      Thu, 29 Oct 2020 10:37:00 +0000\n",
      "      Finished:     Thu, 29 Oct 2020 10:37:00 +0000\n",
      "    Ready:          False\n",
      "    Restart Count:  0\n",
      "    Environment:\n",
      "      SM_TRAINING_ENV:        {\"is_master\":true}\n",
      "      SAGEMAKER_JOB_NAME:     tf-bert-training-eks\n",
      "      SM_CURRENT_HOST:        localhost\n",
      "      SM_NUM_GPUS:            0\n",
      "      SM_HOSTS:               {\"hosts\":\"localhost\"}\n",
      "      SM_MODEL_DIR:           /opt/ml/model/\n",
      "      SM_OUTPUT_DIR:          /opt/ml/output/\n",
      "      SM_OUTPUT_DATA_DIR:     /opt/ml/output/data/\n",
      "      SM_CHANNEL_TRAIN:       /opt/ml/input/data/train\n",
      "      SM_CHANNEL_VALIDATION:  /opt/ml/input/data/validation\n",
      "      SM_CHANNEL_TEST:        /opt/ml/input/data/test\n",
      "    Mounts:\n",
      "      /opt/ml from local-storage (rw)\n",
      "      /var/run/secrets/kubernetes.io/serviceaccount from default-token-nnkjh (ro)\n",
      "Conditions:\n",
      "  Type              Status\n",
      "  Initialized       True \n",
      "  Ready             False \n",
      "  ContainersReady   False \n",
      "  PodScheduled      True \n",
      "Volumes:\n",
      "  local-storage:\n",
      "    Type:       PersistentVolumeClaim (a reference to a PersistentVolumeClaim in the same namespace)\n",
      "    ClaimName:  local-hostpath-pvc\n",
      "    ReadOnly:   false\n",
      "  default-token-nnkjh:\n",
      "    Type:        Secret (a volume populated by a Secret)\n",
      "    SecretName:  default-token-nnkjh\n",
      "    Optional:    false\n",
      "QoS Class:       BestEffort\n",
      "Node-Selectors:  <none>\n",
      "Tolerations:     node.kubernetes.io/not-ready:NoExecute for 300s\n",
      "                 node.kubernetes.io/unreachable:NoExecute for 300s\n",
      "Events:\n",
      "  Type    Reason     Age   From                                                   Message\n",
      "  ----    ------     ----  ----                                                   -------\n",
      "  Normal  Scheduled  12s   default-scheduler                                      Successfully assigned kubeflow/bert to ip-192-168-67-206.us-west-2.compute.internal\n",
      "  Normal  Pulling    11s   kubelet, ip-192-168-67-206.us-west-2.compute.internal  Pulling image \"231218423789.dkr.ecr.us-west-2.amazonaws.com/dlc-demo:bert\"\n",
      "  Normal  Pulled     11s   kubelet, ip-192-168-67-206.us-west-2.compute.internal  Successfully pulled image \"231218423789.dkr.ecr.us-west-2.amazonaws.com/dlc-demo:bert\"\n",
      "  Normal  Created    11s   kubelet, ip-192-168-67-206.us-west-2.compute.internal  Created container bert\n",
      "  Normal  Started    11s   kubelet, ip-192-168-67-206.us-west-2.compute.internal  Started container bert\n"
     ]
    }
   ],
   "source": [
    "!kubectl describe pod bert-ml-pod-openebs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "NAME                 STATUS    VOLUME   CAPACITY   ACCESS MODES   STORAGECLASS     AGE\n",
      "local-hostpath-pvc   Pending                                      local-hostpath   4m57s\n"
     ]
    }
   ],
   "source": [
    "!kubectl get pvc local-hostpath-pvc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!kubectl get pv pvc-864a5ac8-dd3f-416b-9f4b-ffd7d285b425 -o yaml"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Delete"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!kubectl delete pod hello-local-hostpath-pod"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!kubectl delete pvc local-hostpath-pvc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!kubectl delete sc local-hostpath"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!kubectl delete -f https://openebs.github.io/charts/openebs-operator.yaml"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "conda_python3",
   "language": "python",
   "name": "conda_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
